package crawler.jiliqiche;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zql.entity.AgencyEntity;

import dao.AgencyDao;
import util.HttpConnectionGet;
import util.Location;
import util.MybatisTool;
import util.TianYanCha;
//异常
public class JiLiQiCheCrawler {
	public List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		String homeUrl = "http://mall.geely.com/index.php/network";
		try {
			Document home = Jsoup.connect(homeUrl).get();
			Element proSelect = home.getElementById("sel_region");
			Elements proOptions = proSelect.getElementsByTag("option");
			for(int i=1;i<proOptions.size();i++) {
				String proNum = proOptions.get(i).attr("value");
				String proName = proOptions.get(i).text();
				//获取城市的编号和市名
				String cityUrl = "http://mall.geely.com/index.php/dealer/change_resion/"+proNum;
				//获取返回的字符串
				String str = HttpConnectionGet.getJson(cityUrl);
				//System.out.println(str);//"<option value='110100'>\u5317\u4eac<\/option>"
				//强行用FastJSON翻译字符串,翻译Unicode码
				String jsonOption = (String)JSON.parse(str);//String str = "hello";得到str的时候字符串的引号会被去掉
				//System.out.println(jsonOption);//<option value='110100'>北京</option>

				//解析成网页标签元素(option)
				Document citySelect = Jsoup.parse(jsonOption);
				//处理所有option元素,获取市名和编号
				Elements cityOptions = citySelect.getElementsByTag("option");
				for(int j=0;j<cityOptions.size();j++) {
					String cityNum = cityOptions.get(j).attr("value");
					String cityName = cityOptions.get(j).text();
					//cityName = new String(cityName.getBytes(), "utf-8");
					//System.out.println(cityNum+":"+cityName);
					
					String dearlerUrl = "http://mall.geely.com/index.php/dealer/dealer_resion_map?cid="+cityNum+"&carx=0&kw=";
					String dearlerJson = HttpConnectionGet.getJson(dearlerUrl);
					JSONObject json = JSON.parseObject(dearlerJson);
					JSONArray data = json.getJSONArray("data");
					for(int k=0;k<data.size();k++) {
						JSONObject dealer = data.getJSONObject(k);
						String name = dealer.getString("title");
						String sellTell = dealer.getString("contact");
						String address = dealer.getString("address");
						String lngAndLat = dealer.getString("lal");
						String lng = "";
						String lat = "";
						if (lngAndLat==null || lngAndLat.split(",").length<2) {
							lng = Location.getLocation(address)[0];
							lat = Location.getLocation(address)[1];
						} else {
							lng = lngAndLat.split(",")[0];
							lat = lngAndLat.split(",")[1];
						}
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);
						
						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("吉利");

						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("吉利汽车");

						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(null);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);
						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);
						list.add(agency);
					}
					
				}
				
			}
			
		} catch (IOException e) {
			e.printStackTrace();
			throw new RuntimeException("爬虫异常", e);
		}
		return list;
		
	}
	public static void main(String[] args) {
		JiLiQiCheCrawler crawler = new JiLiQiCheCrawler();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		System.out.println("抓取完毕,正在存库");
		for(AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close();
		System.out.println("请查看数据库");
		
	}
}
